require(tidyverse)

############## Input to fig1.R and fig2.R: PSRM_simple_months_chg12.RData
fs <- list.files('./data/VIMP_ranger/','VIMP_2024.*-months.*FALSE_temp-chg12')

vimp <- NULL
for(f in fs) {
  load(paste0('./data/VIMP_ranger/',f))
  vimp <- vimp %>%
    bind_rows(vimpRes %>%
                mutate(relVimp = vimp / predErr) %>%
                group_by(vars,bsInd,outcome) %>%
                summarise(vimp = mean(vimp),
                          relVimp = mean(relVimp),.groups = 'drop'))
}

save(vimp,file = './data/VIMP_ranger/PSRM_simple_months_chg12.RData')


############## Input for fig4.R: combined_res.RData
fs <- list.files('./data/PDP_models/',pattern = paste0('RF_2024_outcome-(',paste(outs,collapse='|'),').*-month.*lag1'))

fs <- fs[-which(grepl('BIN_|ECON$|HAPPY|SAD|COMP',fs))]

issues <- NULL
raw <- NULL
for(f in fs[1:length(fs)]) {
  cat(which(fs == f),'\n')
  # stop()
  test <- try(load(paste0('./data/PDP_models/',f)))
  if(class(test) == 'try-error') {
    issues <- c(issues,f)
    next
  }
  predGrid <- expanderCustom(vals = vals,vars = c('PARTY'))
  
  if(nrow(predGrid) != 5) { next }
  preds <- predict(m,data = predGrid)
  
  predGrid$preds <- preds$predictions
  
  rankObs <- predGrid %>%
    mutate(ECON_ur = (ECON_ur*vals$ECON_ur$msd[2] + vals$ECON_ur$msd[1])) %>%
    mutate(date = as.Date(gsub('.*_period-|_temp.*\\.RData','',f))) %>%
    filter(PARTY != 'REF') %>%
    select(PARTY,preds) %>%
    arrange(preds)
  
  rankRefRD <- data.frame(PARTY = levels(rankObs$PARTY)) %>%
    filter(PARTY != 'REF') %>%
    mutate(predsEmpRD = rankObs$preds)
  
  rankRefDR <- data.frame(PARTY = rev(levels(rankObs$PARTY))) %>%
    filter(PARTY != 'REF') %>%
    mutate(predsEmpDR = rankObs$preds)
  
  raw <- rankObs %>%
    left_join(rankRefRD,by = 'PARTY') %>%
    left_join(rankRefDR,by = 'PARTY') %>%
    mutate(shiftRD = (preds - predsEmpRD)^2,
           shiftDR = (preds - predsEmpDR)^2) %>%
    mutate(date = as.Date(gsub('.*_period-|_temp.*\\.RData','',f)),
           outcome = gsub('.*outcome-|_period.*','',f)) %>%
    bind_rows(raw)
  
  rm(m,predGrid)
  gc()
  Sys.sleep(.1)
}

save(raw,file = './data/PDP_models/combined_res.RData')


############## Input to fig5.R: PSRM_comb_months_ECON_ENOUGHMON.RData
fs <- list.files('./data/VIMP_ranger/','VIMP_2024_outcome-.*(ECON|ENOUGHMON).*-months.*FALSE_temp-chg12')

vimp <- NULL
for(f in fs) {
  load(paste0('./data/VIMP_ranger/',f))
  vimp <- vimp %>%
    bind_rows(vimpRes)
}

save(vimp,file = './data/VIMP_ranger/PSRM_comb_months_ECON_ENOUGHMON.RData')


############## Input to fig6.R and figS6.R: PSRM_design_subset.RData
fs <- list.files('./data/VIMP_ranger/','VIMP_2024_outcome-.*(CUTBACKSPEND|MON|FEEL|SPEND).*-months.*_DumFacts-FALSE_temp-chg12')

vimp <- NULL
for(f in fs) {
  load(paste0('./data/VIMP_ranger/',f))
  vimp <- vimp %>%
    bind_rows(vimpRes)
}

save(vimp,file = './data/VIMP_ranger/PSRM_design_subset.RData')


############## Input to figS2.R: PSRM_lasso_comb.RData
comb <- coefs <- NULL
fs <- list.files('./data/VIMP_lasso/',pattern = 'LASSO_2024_outcome-.*_period-years_temp-lag12')
for(f in fs) {
  # stop()
  load(paste0('./data/VIMP_lasso/',f))
  
  tmp <- lassoRes %>%
    left_join(lambdaLookup %>%
                mutate(normInd = as.integer(gsub('s','',normInd)))) %>%
    group_by(vars,period,normInd,outcome) %>%
    summarise(lambda = mean(lambda),
              norm = mean(norm),
              dev.ratio = mean(dev.ratio),
              m = median(coef),
              lb = quantile(coef,.025),
              ub = quantile(coef,.975),.groups = 'drop')
  
  tmp <- tmp %>%
    left_join(coefRes %>%
                group_by(vars,outcome,period) %>%
                summarise(propIncl=n()/100,.groups = 'drop') %>%
                mutate(vars = gsub('(AGECAT|EDUC|GENDER|INC|MARST|PARTY|RACE)','\\1_',vars)),
              by = c('vars','period','outcome'))
  
  comb <- comb %>%
    bind_rows(tmp)
  
  coefs <- coefs %>%
    bind_rows(coefRes)
  
  rm(tmp)
  gc()
  
}

save(comb,coefs,file = './data/VIMP_lasso/PSRM_lasso_comb.RData')


############## Input to figS4.R and figS7.R: PSRM_comb_weeks_chg12.RData
fs <- list.files('./data/VIMP_ranger/','VIMP_2024_outcome-.*(CUTBACKSPEND|MON|FEEL|SPEND).*-weeks.*_DumFacts-FALSE_temp-chg12')

vimp <- NULL
gc()
for(f in fs) {
  load(paste0('./data/VIMP_ranger/',f))
  vimp <- vimp %>%
    bind_rows(vimpRes %>%
                mutate(period = as.Date(period)) %>%
                filter(period >= as.Date('2012-01-01'),
                       period <= as.Date('2014-01-01')))
  cat(which(fs == f),'\n')
}

save(vimp,lookup,file = './data/VIMP_ranger/PSRM_comb_weeks_chg12.RData')


############## Input to figS5.R and figS8.R: PSRM_comb_days_chg12.RData
fs <- list.files('./data/VIMP_ranger/','VIMP_2024_outcome-.*(CUTBACKSPEND|MON|FEEL|SPEND).*-days.*_DumFacts-FALSE_temp-chg12')

vimp <- NULL
gc()
for(f in fs[1:length(fs)]) {
  vimp <- vimp %>%
    bind_rows(vimpRes %>%
                mutate(period = as.Date(period)) %>%
                filter(period >= as.Date('2012-01-01'),
                       period <= as.Date('2014-01-01')))
  cat(which(fs == f),'\n')
}

save(vimp,lookup,file = './data/VIMP_ranger/PSRM_comb_days_chg12.RData')


# Dataset still too large for dataverse. Simplifying further.
load('./data/VIMP_ranger/PSRM_comb_days_chg12.RData')

toplot <- vimp %>%
  mutate(relVimp = vimp / predErr) %>%
  select(vars,outcome,period,n,relVimp) %>%
  group_by(vars,outcome,period) %>%
  summarise_all(list(mean = ~mean(.,na.rm=T),
                     lb = ~quantile(.,.005,na.rm=T),
                     ub = ~quantile(.,.995,na.rm=T))) %>%
  ungroup() %>%
  left_join(lookup)

save(toplot,file = './data/VIMP_ranger/PSRM_comb_days_chg12_small.RData')
